Data Preparation
Code
library (tidyverse)
library (easystats)
library (patchwork)
library (ggside)
library (modelsummary)
df <- read.csv ("../data/rawdata.csv" ) |>
mutate (Intervention = ifelse (! is.na (Badnews_Questions_Duration), "BadNews" , "Tetris" )) |>
select (- Prolific_ID, - Date_OSF, - BadNews_Duration)
dfmist <- read.csv ("../data/rawdata_mist.csv" )
The initial sample consisted of 371 participants (Mean age = 49.7, SD = 101.1, range: [18, 1964], 0.3% missing; Gender: 52.3% women, 45.8% men, 1.89% non-binary; Education: Bachelor, 35.31%; Doctorate, 3.77%; High School, 36.39%; Master, 18.33%; Other, 6.20%), for a total trial number of 371.
Exclusion
Intervention Duration
Code
df |>
ggplot (aes (x= Intervention_Duration, fill= Intervention)) +
geom_density (alpha= 0.6 ) +
theme_minimal ()
Questionnaires
Political Identification
Code
df$ Political_LiberalConservative <- ifelse (df$ ANES_1 == 4 , NA , df$ ANES_1)
df$ Political_LiberalConservative <- ifelse (df$ Political_LiberalConservative > 3 ,
df$ Political_LiberalConservative - 1 ,
df$ Political_LiberalConservative)
df$ Political_Affiliation <- case_when (df$ ANES_2 == 1 ~ "Democrat" ,
df$ ANES_2 == 2 ~ "Republican" ,
df$ ANES_2 == 3 ~ "None or Independent" ,
.default = "Other" )
df <- select (df, - starts_with ("ANES" ))
The data contains 371 observations of the following 2 variables:
Political_LiberalConservative: n = 371, Mean = 2.85, SD = 1.79, Median = , MAD = 2.97, range: [0, 6], Skewness = 0.03, Kurtosis = -1.07, 1.08% missing
Political_Affiliation: 4 entries, such as Other (34.50%); Republican (33.42%); Democrat (31.81%); None or Independent (0.27%) (0 missing)
Code
df |>
filter (! is.na (Political_LiberalConservative) & Political_Affiliation != "None or Independent" ) |>
ggplot (aes (x= Political_LiberalConservative, fill= Political_Affiliation)) +
geom_density (alpha= 0.6 )
Authoritarianism (VSA)
Total Right-Wing Authoritarianism score: the sum of all items divided by 6. Note:Item 1 and 2 measure Conservatism or Authoritarian Submission. Items 3 and 4 measure Traditionalism or Conventionalism. Items 5 and 6 measure Authoritarianism or Authoritarian Aggression.
Code
vsa <- select (df, starts_with ("VSA_" ), - VSA_Duration)
plot (summary (correlation (vsa)))
Code
df$ VSA_Conservatism <- (datawizard:: reverse_scale (df$ VSA_1, range = c (0 , 8 )) + df$ VSA_2) / 2
df$ VSA_Traditionalism <- (df$ VSA_3 + datawizard:: reverse_scale (df$ VSA_4, range = c (0 , 8 ))) / 2
df$ VSA_Authoritarianism <- (datawizard:: reverse_scale (df$ VSA_5, range = c (0 , 8 )) + df$ VSA_6) / 2
df$ VSA_General <- rowMeans (df[, c ("VSA_Conservatism" , "VSA_Traditionalism" , "VSA_Authoritarianism" )], na.rm = TRUE )
modelsummary:: datasummary_skim (select (df, starts_with ("VSA_" )))
tinytable_zgfykz2sv8wgozddqspg
Unique
Missing Pct.
Mean
SD
Min
Median
Max
Histogram
VSA_Duration
371
0
2.1
11.8
0.1
0.9
223.7
VSA_1
9
0
4.1
2.5
0.0
4.0
8.0
VSA_2
9
0
3.6
2.5
0.0
3.0
8.0
VSA_3
9
0
3.2
2.9
0.0
3.0
8.0
VSA_4
9
0
5.4
2.6
0.0
6.0
8.0
VSA_5
9
0
4.3
2.4
0.0
4.0
8.0
VSA_6
9
0
4.3
2.5
0.0
5.0
8.0
VSA_Conservatism
17
0
3.8
2.1
0.0
3.5
8.0
VSA_Traditionalism
17
0
2.9
2.5
0.0
2.5
8.0
VSA_Authoritarianism
17
0
4.0
2.1
0.0
4.0
8.0
VSA_General
49
0
3.6
1.9
0.0
3.5
8.0
BFI
df$ BFI_Agreeableness <- (df$ BFI10_2 + (6 - df$ BFI10_7)) / 2
df$ BFI_Extraversion <- (df$ BFI10_6 + (6 - df$ BFI10_1)) / 2
df$ BFI_Conscientiousness <- (df$ BFI10_8 + (6 - df$ BFI10_3)) / 2
df$ BFI_Neuroticism <- (df$ BFI10_9 + (6 - df$ BFI10_4)) / 2
df$ BFI_Openness <- (df$ BFI10_10 + (6 - df$ BFI10_5)) / 2
Code
modelsummary:: datasummary_skim (select (df, starts_with ("BFI_" )))
tinytable_crkpi4ojkicrb6ia8axl
Unique
Missing Pct.
Mean
SD
Min
Median
Max
Histogram
BFI_Duration
371
0
1.2
2.6
0.3
0.8
46.7
BFI_Agreeableness
9
0
3.6
1.0
1.0
4.0
5.0
BFI_Extraversion
9
0
2.8
1.1
1.0
3.0
5.0
BFI_Conscientiousness
9
0
4.2
0.9
1.0
4.5
5.0
BFI_Neuroticism
9
0
2.6
1.2
1.0
2.5
5.0
BFI_Openness
9
0
3.8
0.9
1.0
4.0
5.0
GCBS
Code
GCBS <- select (df, starts_with ("GCBS" ))
df$ GCBS_GovernmentMalfeasance <- (df$ GCBS15_1 + df$ GCBS15_6 + df$ GCBS15_11) / 3
df$ GCBS_Extraterrestrial <- (df$ GCBS15_3 + df$ GCBS15_8 + df$ GCBS15_13) / 3
df$ GCBS_Malevolent <- (df$ GCBS15_2 + df$ GCBS15_7 + df$ GCBS15_12) / 3
df$ GCBS_Wellbeing <- (df$ GCBS15_4 + df$ GCBS15_9 + df$ GCBS15_14) / 3
df$ GCBS_Control <- (df$ GCBS15_5 + df$ GCBS15_10 + df$ GCBS15_15) / 3
df$ GCBS_General <- rowMeans (select (df, starts_with ("GCBS_" ), - GCBS_Duration), na.rm = TRUE )
plot (summary (correlation (select (df, starts_with ("GCBS_" ), - GCBS_Duration))))
Code
modelsummary:: datasummary_skim (GCBS)
tinytable_mmvsxpplal3z657yerq7
Unique
Missing Pct.
Mean
SD
Min
Median
Max
Histogram
GCBS_Duration
371
0
2.8
3.1
0.3
2.0
37.8
GCBS15_8
5
0
1.7
1.3
0.0
1.0
4.0
GCBS15_2
5
0
1.9
1.3
0.0
2.0
4.0
GCBS15_3
5
0
1.4
1.3
0.0
1.0
4.0
GCBS15_6
5
0
1.7
1.3
0.0
1.0
4.0
GCBS15_11
5
0
2.0
1.3
0.0
2.0
4.0
GCBS15_1
5
0
1.9
1.3
0.0
2.0
4.0
GCBS15_5
5
0
1.8
1.3
0.0
1.0
4.0
GCBS15_13
5
0
1.5
1.3
0.0
1.0
4.0
GCBS15_15
5
0
2.8
1.2
0.0
3.0
4.0
GCBS15_12
5
0
1.9
1.3
0.0
2.0
4.0
GCBS15_9
5
0
1.5
1.3
0.0
1.0
4.0
GCBS15_7
5
0
1.8
1.4
0.0
2.0
4.0
GCBS15_14
5
0
1.8
1.3
0.0
2.0
4.0
GCBS15_4
5
0
1.8
1.4
0.0
2.0
4.0
GCBS15_10
5
0
2.2
1.2
0.0
3.0
4.0
MOCRI
Code
## does this give us useful data for both the pre- and post-intervention MOCRI, or do I need to split my coding to be between the bold and nonbold (as this is how the MOCRI is split pre- and post-)?
MOCRI <- select (df, starts_with ("MOCRI_" ))
df <- select (df, - starts_with ("MOCRI" ))
# Recode in terms of correct incorrect
# df$MOCRI_Pretest_Manipulative1 <- ifelse(df$MOCRI_BOLD_MOCRI_BOLD_8_Manipulative == 1, 1, 0)
modelsummary:: datasummary_skim (MOCRI)
tinytable_hgvje1dobnbyy8bvknsq
Unique
Missing Pct.
Mean
SD
Min
Median
Max
Histogram
MOCRI_BOLD_Duration
371
0
2.7
2.2
0.3
2.2
22.2
MOCRI_BOLD_MOCRI_BOLD_4_NonManipulative
2
0
0.8
0.4
0.0
1.0
1.0
MOCRI_BOLD_MOCRI_BOLD_10_Manipulative
2
0
0.3
0.5
0.0
0.0
1.0
MOCRI_BOLD_MOCRI_BOLD_9_NonManipulative
2
0
0.7
0.5
0.0
1.0
1.0
MOCRI_BOLD_MOCRI_BOLD_8_Manipulative
2
0
0.3
0.5
0.0
0.0
1.0
MOCRI_BOLD_MOCRI_BOLD_5_NonManipulative
2
0
0.7
0.5
0.0
1.0
1.0
MOCRI_BOLD_MOCRI_BOLD_11_NonManipulative
2
0
0.6
0.5
0.0
1.0
1.0
MOCRI_BOLD_MOCRI_BOLD_3_Manipulative
2
0
0.2
0.4
0.0
0.0
1.0
MOCRI_BOLD_MOCRI_BOLD_6_Manipulative
2
0
0.3
0.4
0.0
0.0
1.0
MOCRI_BOLD_MOCRI_BOLD_12_Manipulative
2
0
0.5
0.5
0.0
0.0
1.0
MOCRI_BOLD_MOCRI_BOLD_2_NonManipulative
2
0
0.7
0.4
0.0
1.0
1.0
MOCRI_BOLD_MOCRI_BOLD_7_NonManipulative
2
0
0.7
0.4
0.0
1.0
1.0
MOCRI_BOLD_MOCRI_BOLD_1_Manipulative
2
0
0.2
0.4
0.0
0.0
1.0
MOCRI_NONBOLD_Duration
369
0
2.0
1.8
0.3
1.6
17.1
MOCRI_NONBOLD_MOCRI_10_Manipulative
2
0
0.2
0.4
0.0
0.0
1.0
MOCRI_NONBOLD_MOCRI_6_Manipulative
2
0
0.3
0.4
0.0
0.0
1.0
MOCRI_NONBOLD_MOCRI_2_Manipulative
2
0
0.2
0.4
0.0
0.0
1.0
MOCRI_NONBOLD_MOCRI_7_Manipulative
2
0
0.2
0.4
0.0
0.0
1.0
MOCRI_NONBOLD_MOCRI_9_NonManipulative
2
0
0.8
0.4
0.0
1.0
1.0
MOCRI_NONBOLD_MOCRI_1_NonManipulative
2
0
0.9
0.4
0.0
1.0
1.0
MOCRI_NONBOLD_MOCRI_3_Manipulative
2
0
0.2
0.4
0.0
0.0
1.0
MOCRI_NONBOLD_MOCRI_5_NonManipulative
2
0
0.7
0.4
0.0
1.0
1.0
MOCRI_NONBOLD_MOCRI_11_NonManipulative
2
0
0.6
0.5
0.0
1.0
1.0
MOCRI_NONBOLD_MOCRI_12_NonManipulative
2
0
0.6
0.5
0.0
1.0
1.0
MOCRI_NONBOLD_MOCRI_4_NonManipulative
2
0
0.8
0.4
0.0
1.0
1.0
MOCRI_NONBOLD_MOCRI_8_Manipulative
2
0
0.2
0.4
0.0
0.0
1.0
MIST
Code
dfmist <- dfmist |>
full_join (df[, c ("Participant" , "Intervention" )], by = "Participant" ) |>
mutate (temp = Item) |>
separate (temp, into = c ("extra" , "QuestionType" , "Topic" , "QuestionID" ), sep = "_" ) |>
select (- extra, - QuestionID) |>
mutate (
Correct = case_when (
MIST > 50 & QuestionType == "real" ~ 1 ,
MIST < 50 & QuestionType == "fake" ~ 1 ,
.default = 0
),
True_Positive = ifelse (MIST > 50 & QuestionType == "real" , 1 , 0 ),
True_Negative = ifelse (MIST < 50 & QuestionType == "fake" , 1 , 0 ),
False_Positive = ifelse (MIST > 50 & QuestionType == "fake" , 1 , 0 ),
False_Negative = ifelse (MIST < 50 & QuestionType == "real" , 1 , 0 )
)
Items
Code
# Participant scores
dfmist |>
summarise (p_Correct = sum (Correct) / n (), .by= c ("Item" )) |>
arrange (p_Correct) |>
gt:: gt () |>
gt:: fmt_number (columns = "p_Correct" , decimals = 2 ) |>
gt:: data_color (columns = "p_Correct" , palette= c ("red" , "green" ))
MIST_real_general_14
0.09
MIST_real_general_12
0.14
MIST_fake_general_10
0.15
MIST_real_general_11
0.18
MIST_fake_covid_1
0.22
MIST_real_general_10
0.22
MIST_fake_covid_6
0.26
MIST_fake_covid_5
0.27
MIST_fake_general_12
0.30
MIST_fake_covid_7
0.30
MIST_real_general_7
0.31
MIST_fake_covid_9
0.32
MIST_fake_covid_10
0.32
MIST_real_general_13
0.33
MIST_fake_covid_3
0.34
MIST_real_general_8
0.40
MIST_real_general_4
0.41
MIST_fake_covid_8
0.43
MIST_fake_general_2
0.46
MIST_fake_covid_2
0.47
MIST_real_general_9
0.49
MIST_real_general_2
0.54
MIST_fake_general_8
0.57
MIST_fake_general_26
0.58
MIST_real_covid_7
0.58
MIST_real_covid_1
0.61
MIST_fake_general_18
0.61
MIST_fake_general_30
0.62
MIST_fake_general_19
0.63
MIST_real_general_5
0.63
MIST_real_general_31
0.63
MIST_fake_general_13
0.64
MIST_fake_general_21
0.64
MIST_fake_general_29
0.64
MIST_real_covid_4
0.65
MIST_fake_general_22
0.65
MIST_fake_general_15
0.68
MIST_fake_general_25
0.68
MIST_fake_general_24
0.68
MIST_real_general_18
0.68
MIST_real_general_25
0.69
MIST_real_general_30
0.70
MIST_fake_general_14
0.70
MIST_fake_general_20
0.70
MIST_real_general_3
0.70
MIST_real_general_20
0.70
MIST_real_general_19
0.70
MIST_fake_general_6
0.71
MIST_real_general_26
0.72
MIST_fake_general_27
0.72
MIST_real_general_22
0.72
MIST_fake_general_28
0.73
MIST_real_general_17
0.73
MIST_real_general_1
0.73
MIST_real_general_16
0.73
MIST_fake_general_4
0.74
MIST_fake_general_3
0.75
MIST_fake_general_11
0.75
MIST_fake_general_5
0.75
MIST_real_covid_5
0.76
MIST_real_covid_6
0.77
MIST_real_covid_3
0.78
MIST_fake_general_1
0.78
MIST_real_general_15
0.78
MIST_fake_general_17
0.79
MIST_fake_general_9
0.79
MIST_real_general_28
0.79
MIST_fake_general_7
0.80
MIST_real_general_24
0.80
MIST_real_general_27
0.81
MIST_real_covid_10
0.81
MIST_fake_general_16
0.81
MIST_real_covid_9
0.81
MIST_real_general_23
0.83
MIST_fake_covid_4
0.85
MIST_fake_general_23
0.86
MIST_real_general_29
0.86
MIST_real_covid_8
0.87
MIST_real_general_21
0.87
MIST_real_general_6
0.87
MIST_real_covid_2
0.91
Scores
Code
compute_dprime <- function (data) {
# Calculate hit rate and false alarm rate
H <- (data$ True_Positive + 0.5 ) / (data$ True_Positive + data$ False_Negative + 1 ) # Adjusted Hit Rate
FA <- (data$ False_Positive + 0.5 ) / (data$ False_Positive + data$ True_Negative + 1 ) # Adjusted False Alarm Rate
# Parametric ----
# Compute z-scores
zH <- qnorm (H) # z-score for hit rate
zFA <- qnorm (FA) # z-score for false alarm rate
# d' and criterion
d_prime <- zH - zFA
criterion <- - 0.5 * (zH + zFA)
# Non-parametric ----
# A' (A-prime)
A_prime <- ifelse (
H > FA,
0.5 + ((H - FA) * (1 + H - FA)) / (4 * H * (1 - FA)),
ifelse (
H < FA,
0.5 + ((FA - H) * (1 + FA - H)) / (4 * FA * (1 - H)),
0.5
)
)
# B''d
B_double_prime <- ifelse (
H != FA,
((1 - H) * (1 - FA) - H * FA) / ((1 - H) * (1 - FA) + H * FA),
0
)
# Combine results
cbind (data, data.frame (
"dprime" = d_prime,
"criterion" = criterion,
"aprime" = A_prime,
"bppd" = B_double_prime
))
}
# Participant scores
df <- dfmist |>
summarise (
correct = sum (Correct) / n (),
True_Positive = sum (True_Positive),
True_Negative = sum (True_Negative),
False_Positive = sum (False_Positive),
False_Negative = sum (False_Negative),
.by = c ("Participant" , "Condition" , "Topic" )
) |>
compute_dprime () |>
select (Participant, Condition, Topic, correct, dprime, criterion, aprime, bppd) |>
pivot_wider (names_from= c ("Condition" , "Topic" ),
values_from= c ("correct" , "dprime" , "criterion" , "aprime" , "bppd" ),
names_vary = "slowest" ) |>
mutate (correct_Diff_covid = correct_Posttest_covid - correct_Pretest_covid,
correct_Diff_general = correct_Posttest_general - correct_Pretest_general,
dprime_Diff_covid = dprime_Posttest_covid - dprime_Pretest_covid,
dprime_Diff_general = dprime_Posttest_general - dprime_Pretest_general,
criterion_Diff_covid = criterion_Posttest_covid - criterion_Pretest_covid,
criterion_Diff_general = criterion_Posttest_general - criterion_Pretest_general,
aprime_Diff_covid = aprime_Posttest_covid - aprime_Pretest_covid,
aprime_Diff_general = aprime_Posttest_general - aprime_Pretest_general,
bppd_Diff_covid = bppd_Posttest_covid - bppd_Pretest_covid,
bppd_Diff_general = bppd_Posttest_general - bppd_Pretest_general) |>
datawizard:: data_addprefix ("MIST_" , select= - Participant) |>
full_join (df, by= "Participant" )
Final Sample
Gender
Code
ggplot (df, aes (x = Gender)) +
geom_bar (fill = "skyblue" ) +
labs (title = "Distribution of Gender" , x = "Gender" , y = "Count" )
Code
## RACE
# this won't currently work as it's pulling from the self-identification ethnicity column which has far too many distinct entries to be represented well visually. I am not sure how we could get the basic racial data from prolific but it's also not useful for any actual testing we want to do, so probably wouldn't worry about it
#ggplot(df, aes(x = Ethnicity)) +
# geom_bar(fill = "lightgreen") +
# labs(title = "Distribution of Race", x = "Race", y = "Count")
Education
Code
# Create the bar chart for Education
ggplot (df, aes (x = Education)) +
geom_bar (fill = "lightblue" ) +
labs (title = "Distribution of Education Level" ,
x = "Education Level" ,
y = "Count" ) +
theme_minimal () # Optional: Adds a minimal theme for better aesthetics
Save
Code
write.csv (df, "../data/data.csv" )
write.csv (dfmist, "../data/data_mist.csv" )